{
 "cells": [
  {
   "cell_type": "markdown",
   "source": [
    "1. DateFrame介绍（略）"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "2. DataFrame构建"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2024-07-15T01:52:41.559055100Z",
     "start_time": "2024-07-15T01:52:41.308997400Z"
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [
    {
     "data": {
      "text/plain": "   a  b   c\n0  1  5   9\n1  2  6  10\n2  3  7  11\n3  4  8  12",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>a</th>\n      <th>b</th>\n      <th>c</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>5</td>\n      <td>9</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2</td>\n      <td>6</td>\n      <td>10</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>3</td>\n      <td>7</td>\n      <td>11</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>4</td>\n      <td>8</td>\n      <td>12</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 数组、列表或元组构成的字典构造dataframe\n",
    "# 构造一个字典\n",
    "data = {'a': [1, 2, 3, 4],\n",
    "        'b': (5, 6, 7, 8),\n",
    "        'c': np.arange(9, 13)}\n",
    "# 构造dataframe\n",
    "frame = pd.DataFrame(data)\n",
    "frame"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-15T01:56:07.955996600Z",
     "start_time": "2024-07-15T01:56:07.949760700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [
    {
     "data": {
      "text/plain": "RangeIndex(start=0, stop=4, step=1)"
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# index属性查看行索引\n",
    "frame.index"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-15T01:58:34.661084Z",
     "start_time": "2024-07-15T01:58:34.655573700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "outputs": [
    {
     "data": {
      "text/plain": "Index(['a', 'b', 'c'], dtype='object')"
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 列索引用columns\n",
    "frame.columns"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-15T02:00:01.681446300Z",
     "start_time": "2024-07-15T02:00:01.676448300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 1  5  9]\n",
      " [ 2  6 10]\n",
      " [ 3  7 11]\n",
      " [ 4  8 12]]\n"
     ]
    }
   ],
   "source": [
    "# values属性查看值\n",
    "print(frame.values)  # 似乎是数组"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-15T02:02:47.477910300Z",
     "start_time": "2024-07-15T02:02:47.474400400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [
    {
     "data": {
      "text/plain": "   a  b   c\nA  1  5   9\nB  2  6  10\nC  3  7  11\nD  4  8  12",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>a</th>\n      <th>b</th>\n      <th>c</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>A</th>\n      <td>1</td>\n      <td>5</td>\n      <td>9</td>\n    </tr>\n    <tr>\n      <th>B</th>\n      <td>2</td>\n      <td>6</td>\n      <td>10</td>\n    </tr>\n    <tr>\n      <th>C</th>\n      <td>3</td>\n      <td>7</td>\n      <td>11</td>\n    </tr>\n    <tr>\n      <th>D</th>\n      <td>4</td>\n      <td>8</td>\n      <td>12</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 指定index\n",
    "frame = pd.DataFrame(data, index=['A', 'B', 'C', 'D'])\n",
    "frame"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-15T02:03:59.090282800Z",
     "start_time": "2024-07-15T02:03:59.085190500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "outputs": [
    {
     "data": {
      "text/plain": "   a  b   c    d\nA  1  5   9  NaN\nB  2  6  10  NaN\nC  3  7  11  NaN\nD  4  8  12  NaN",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>a</th>\n      <th>b</th>\n      <th>c</th>\n      <th>d</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>A</th>\n      <td>1</td>\n      <td>5</td>\n      <td>9</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>B</th>\n      <td>2</td>\n      <td>6</td>\n      <td>10</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>C</th>\n      <td>3</td>\n      <td>7</td>\n      <td>11</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>D</th>\n      <td>4</td>\n      <td>8</td>\n      <td>12</td>\n      <td>NaN</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame = pd.DataFrame(data, index=['A', 'B', 'C', 'D'],\n",
    "                     columns=['a', 'b', 'c', 'd'])\n",
    "frame"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-15T02:06:09.141064Z",
     "start_time": "2024-07-15T02:06:09.134980800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "outputs": [
    {
     "data": {
      "text/plain": "   a    b\n0  0  3.0\n1  1  4.0\n2  2  NaN",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>a</th>\n      <th>b</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>0</td>\n      <td>3.0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2</td>\n      <td>NaN</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 2.Series构成的字典构造dataframe\n",
    "pd1 = pd.DataFrame({'a': pd.Series(np.arange(3)),\n",
    "                    'b': pd.Series(np.arange(3, 5))})\n",
    "pd1"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-15T02:12:31.422439600Z",
     "start_time": "2024-07-15T02:12:31.419924300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          a  b    c\n",
      "apple   3.6  3  3.2\n",
      "banana  5.6  5  NaN\n"
     ]
    }
   ],
   "source": [
    "# 3.字典构成的字典构造dataframe\n",
    "# 字典嵌套\n",
    "data1 = {\n",
    "    'a': {'apple': 3.6, 'banana': 5.6},\n",
    "    'b': {'apple': 3, 'banana': 5},\n",
    "    'c': {'apple': 3.2}\n",
    "}\n",
    "pd2 = pd.DataFrame(data1)\n",
    "print(pd2)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-15T02:17:41.601289800Z",
     "start_time": "2024-07-15T02:17:41.594265300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 2.1 2D ndarray构造的dataframe"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "outputs": [
    {
     "data": {
      "text/plain": "   0   1   2\n0  0   1   2\n1  3   4   5\n2  6   7   8\n3  9  10  11",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n      <th>2</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>3</td>\n      <td>4</td>\n      <td>5</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>6</td>\n      <td>7</td>\n      <td>8</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>9</td>\n      <td>10</td>\n      <td>11</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 构造二维数组对象\n",
    "arr1 = np.arange(12).reshape(4, 3)\n",
    "frame1 = pd.DataFrame(arr1)\n",
    "frame1"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-15T02:43:04.072378800Z",
     "start_time": "2024-07-15T02:43:04.068871800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 2.2字典构成的列表构造dataframe"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   apple  banana\n",
      "0    3.6     5.6\n",
      "1    3.0     5.0\n",
      "2    3.2     NaN\n"
     ]
    }
   ],
   "source": [
    "# 内部key变为列索引，但是字典嵌套字典是行索引\n",
    "l1 = [{'apple': 3.6, 'banana': 5.6},\n",
    "      {'apple': 3, 'banana': 5},\n",
    "      {'apple': 3.2}]\n",
    "pd3 = pd.DataFrame(l1)\n",
    "print(pd3)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-15T02:47:29.352124Z",
     "start_time": "2024-07-15T02:47:29.339124500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "outputs": [
    {
     "data": {
      "text/plain": "          0         1         2         3         4\n0  0.317684  0.513301  0.710612       NaN       NaN\n1  0.269660  0.866278       NaN       NaN       NaN\n2  0.415622  0.828641  0.844279  0.084828  0.165376",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n      <th>2</th>\n      <th>3</th>\n      <th>4</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>0.317684</td>\n      <td>0.513301</td>\n      <td>0.710612</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>0.269660</td>\n      <td>0.866278</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>0.415622</td>\n      <td>0.828641</td>\n      <td>0.844279</td>\n      <td>0.084828</td>\n      <td>0.165376</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 2.3 Series构成的列表构造dataframe\n",
    "l2 = [pd.Series(np.random.rand(3)),\n",
    "      pd.Series(np.random.rand(2)),\n",
    "      pd.Series(np.random.rand(5))]\n",
    "pd4 = pd.DataFrame(l2)\n",
    "pd4"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-15T02:54:24.827050300Z",
     "start_time": "2024-07-15T02:54:24.822819800Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "3 DataFrame的基本用法"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "outputs": [
    {
     "data": {
      "text/plain": "   A  B  C\na  0  1  2\nb  3  4  5\nc  6  7  8",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>A</th>\n      <th>B</th>\n      <th>C</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>a</th>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>b</th>\n      <td>3</td>\n      <td>4</td>\n      <td>5</td>\n    </tr>\n    <tr>\n      <th>c</th>\n      <td>6</td>\n      <td>7</td>\n      <td>8</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd5 = pd.DataFrame(np.arange(9).reshape(3, 3), index=['a', 'b', 'c'], columns=['A', 'B', 'C'])\n",
    "pd5"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-15T03:02:58.359480900Z",
     "start_time": "2024-07-15T03:02:58.346949600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "outputs": [
    {
     "data": {
      "text/plain": "   a  b  c\nA  0  3  6\nB  1  4  7\nC  2  5  8",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>a</th>\n      <th>b</th>\n      <th>c</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>A</th>\n      <td>0</td>\n      <td>3</td>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th>B</th>\n      <td>1</td>\n      <td>4</td>\n      <td>7</td>\n    </tr>\n    <tr>\n      <th>C</th>\n      <td>2</td>\n      <td>5</td>\n      <td>8</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 转置\n",
    "pd5.T"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-15T03:03:38.229629400Z",
     "start_time": "2024-07-15T03:03:38.217120500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0\n",
      "b    3\n",
      "c    6\n",
      "Name: A, dtype: int64\n",
      "<class 'pandas.core.series.Series'>\n"
     ]
    }
   ],
   "source": [
    "print(pd5['A'])\n",
    "print(type(pd5['A']))  # 居然是Series类型"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-15T03:05:03.774021300Z",
     "start_time": "2024-07-15T03:05:03.770967700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "outputs": [
    {
     "data": {
      "text/plain": "   A  B  C  D\na  0  1  2  1\nb  3  4  5  2\nc  6  7  8  3",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>A</th>\n      <th>B</th>\n      <th>C</th>\n      <th>D</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>a</th>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>b</th>\n      <td>3</td>\n      <td>4</td>\n      <td>5</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>c</th>\n      <td>6</td>\n      <td>7</td>\n      <td>8</td>\n      <td>3</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd5['D'] = [1, 2, 3]\n",
    "pd5"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-15T03:08:51.738554100Z",
     "start_time": "2024-07-15T03:08:51.725487500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
